Load required packages:
require(data.table)
require(ggplot2)
require(plotly)
require(RSQLite)
Fetch data from db:
# set the file path to db
s.dbpath = "../data/dhtlog.db"
# connect to db
con <- dbConnect(drv=RSQLite::SQLite(), dbname=s.dbpath)
# list all tables
tables <- dbListTables(con)
# exclude sqlite_sequence (contains table information)
tables <- tables[tables != "sqlite_sequence"]
lDataFrames <- vector("list", length=length(tables))
# create a data.frame for each table
for (i in seq(along=tables)) {
lDataFrames[[i]] <- as.data.table(dbGetQuery(conn=con, statement=paste("SELECT * FROM '", tables[[i]], "'", sep="")))
}
# take 1st table only (this is where our data lives)
dt = lDataFrames[[1]]
Get season from dates (adapted from https://stackoverflow.com/a/9501225/1898713):
getSeason <- function(DATES) {
WS <- as.Date("2012-12-21", format = "%Y-%m-%d") # Winter Solstice
SE <- as.Date("2012-3-20", format = "%Y-%m-%d") # Spring Equinox
SS <- as.Date("2012-6-21", format = "%Y-%m-%d") # Summer Solstice
FE <- as.Date("2012-9-22", format = "%Y-%m-%d") # Fall Equinox
# Convert dates from any year to 2012 dates
d <- as.Date(strftime(DATES, format="2012-%m-%d"))
ifelse (d >= WS | d < SE, "Winter",
ifelse (d >= SE & d < SS, "Spring",
ifelse (d >= SS & d < FE, "Summer", "Fall")))
}
# add a column 'season' to our data
dt[, season := getSeason(timestamp)]
# order season column
dt[, season := factor(season, levels = c('Winter', 'Spring', 'Summer', 'Fall'))]
Sometimes sensors malfunction and give strange readings such as very low temperature or very high humidity. Remove such measurements:
dt = dt[temp > 10 & humid < 100]
Plot interactive scatter plot with points coloured by season. To avoid slowdown in plotly there are nPts random points selected and plotted for each season.
# number of points to select
nPts = 500
# create a new trimmed dt with nPts random points from each season
dt.short = dt[, .SD[sample(.N, min(.N, nPts))], by = season]
# create a ggplot
p1 = ggplot(dt.short, aes(x = temp, y = humid, label = timestamp)) +
geom_point(alpha = 0.3, aes(fill = season), colour = NA) +
xlab('\nTemperature') +
ylab('Humidity\n') +
theme_bw()
# interactive scatter plot
ggplotly(p1)
LS0tCnRpdGxlOiAiUGxvdCB0ZW1wZXJhdHVyZSB2cyBodW1pZGl0eSIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6IGRlZmF1bHQKICBodG1sX25vdGVib29rOiBkZWZhdWx0Ci0tLQoKTG9hZCByZXF1aXJlZCBwYWNrYWdlczoKCmBgYHtyfQpyZXF1aXJlKGRhdGEudGFibGUpCnJlcXVpcmUoZ2dwbG90MikKcmVxdWlyZShwbG90bHkpCnJlcXVpcmUoUlNRTGl0ZSkKYGBgCgoKRmV0Y2ggZGF0YSBmcm9tIGRiOgpgYGB7cn0KIyBzZXQgdGhlIGZpbGUgcGF0aCB0byBkYgpzLmRicGF0aCA9ICIuLi9kYXRhL2RodGxvZy5kYiIKCiMgY29ubmVjdCB0byBkYgpjb24gPC0gZGJDb25uZWN0KGRydj1SU1FMaXRlOjpTUUxpdGUoKSwgZGJuYW1lPXMuZGJwYXRoKQoKIyBsaXN0IGFsbCB0YWJsZXMKdGFibGVzIDwtIGRiTGlzdFRhYmxlcyhjb24pCgojIGV4Y2x1ZGUgc3FsaXRlX3NlcXVlbmNlIChjb250YWlucyB0YWJsZSBpbmZvcm1hdGlvbikKdGFibGVzIDwtIHRhYmxlc1t0YWJsZXMgIT0gInNxbGl0ZV9zZXF1ZW5jZSJdCgpsRGF0YUZyYW1lcyA8LSB2ZWN0b3IoImxpc3QiLCBsZW5ndGg9bGVuZ3RoKHRhYmxlcykpCgojIGNyZWF0ZSBhIGRhdGEuZnJhbWUgZm9yIGVhY2ggdGFibGUKZm9yIChpIGluIHNlcShhbG9uZz10YWJsZXMpKSB7CiAgbERhdGFGcmFtZXNbW2ldXSA8LSBhcy5kYXRhLnRhYmxlKGRiR2V0UXVlcnkoY29ubj1jb24sIHN0YXRlbWVudD1wYXN0ZSgiU0VMRUNUICogRlJPTSAnIiwgdGFibGVzW1tpXV0sICInIiwgc2VwPSIiKSkpCn0KCiMgdGFrZSAxc3QgdGFibGUgb25seSAodGhpcyBpcyB3aGVyZSBvdXIgZGF0YSBsaXZlcykKZHQgPSBsRGF0YUZyYW1lc1tbMV1dCmBgYAoKCkdldCBzZWFzb24gZnJvbSBkYXRlcyAoYWRhcHRlZCBmcm9tIGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vYS85NTAxMjI1LzE4OTg3MTMpOgoKYGBge3J9CmdldFNlYXNvbiA8LSBmdW5jdGlvbihEQVRFUykgewogIFdTIDwtIGFzLkRhdGUoIjIwMTItMTItMjEiLCBmb3JtYXQgPSAiJVktJW0tJWQiKSAjIFdpbnRlciBTb2xzdGljZQogIFNFIDwtIGFzLkRhdGUoIjIwMTItMy0yMCIsICBmb3JtYXQgPSAiJVktJW0tJWQiKSAjIFNwcmluZyBFcXVpbm94CiAgU1MgPC0gYXMuRGF0ZSgiMjAxMi02LTIxIiwgIGZvcm1hdCA9ICIlWS0lbS0lZCIpICMgU3VtbWVyIFNvbHN0aWNlCiAgRkUgPC0gYXMuRGF0ZSgiMjAxMi05LTIyIiwgIGZvcm1hdCA9ICIlWS0lbS0lZCIpICMgRmFsbCBFcXVpbm94CiAgCiAgIyBDb252ZXJ0IGRhdGVzIGZyb20gYW55IHllYXIgdG8gMjAxMiBkYXRlcwogIGQgPC0gYXMuRGF0ZShzdHJmdGltZShEQVRFUywgZm9ybWF0PSIyMDEyLSVtLSVkIikpCiAgCiAgaWZlbHNlIChkID49IFdTIHwgZCA8IFNFLCAiV2ludGVyIiwKICAgICAgICAgIGlmZWxzZSAoZCA+PSBTRSAmIGQgPCBTUywgIlNwcmluZyIsCiAgICAgICAgICAgICAgICAgIGlmZWxzZSAoZCA+PSBTUyAmIGQgPCBGRSwgIlN1bW1lciIsICJGYWxsIikpKQp9CgojIGFkZCBhIGNvbHVtbiAnc2Vhc29uJyB0byBvdXIgZGF0YQpkdFssIHNlYXNvbiA6PSBnZXRTZWFzb24odGltZXN0YW1wKV0KCiMgb3JkZXIgc2Vhc29uIGNvbHVtbgpkdFssIHNlYXNvbiA6PSBmYWN0b3Ioc2Vhc29uLCBsZXZlbHMgPSBjKCdXaW50ZXInLCAnU3ByaW5nJywgJ1N1bW1lcicsICdGYWxsJykpXQpgYGAKClNvbWV0aW1lcyBzZW5zb3JzIG1hbGZ1bmN0aW9uIGFuZCBnaXZlIHN0cmFuZ2UgcmVhZGluZ3Mgc3VjaCBhcyB2ZXJ5IGxvdyB0ZW1wZXJhdHVyZSBvciB2ZXJ5IGhpZ2ggaHVtaWRpdHkuIFJlbW92ZSBzdWNoIG1lYXN1cmVtZW50czoKYGBge3J9CmR0ID0gZHRbdGVtcCA+IDEwICYgaHVtaWQgPCAxMDBdCmBgYAoKClBsb3QgaW50ZXJhY3RpdmUgc2NhdHRlciBwbG90IHdpdGggcG9pbnRzIGNvbG91cmVkIGJ5IHNlYXNvbi4gVG8gYXZvaWQgc2xvd2Rvd24gaW4gYHBsb3RseWAgdGhlcmUgYXJlIGBuUHRzYCByYW5kb20gcG9pbnRzIHNlbGVjdGVkIGFuZCBwbG90dGVkIGZvciBlYWNoIHNlYXNvbi4KCmBgYHtyfQojIG51bWJlciBvZiBwb2ludHMgdG8gc2VsZWN0Cm5QdHMgPSA1MDAKCiMgY3JlYXRlIGEgbmV3IHRyaW1tZWQgZHQgd2l0aCBuUHRzIHJhbmRvbSBwb2ludHMgZnJvbSBlYWNoIHNlYXNvbgpkdC5zaG9ydCA9IGR0WywgLlNEW3NhbXBsZSguTiwgbWluKC5OLCBuUHRzKSldLCBieSA9IHNlYXNvbl0KCiMgY3JlYXRlIGEgZ2dwbG90CnAxID0gZ2dwbG90KGR0LnNob3J0LCBhZXMoeCA9IHRlbXAsIHkgPSBodW1pZCwgbGFiZWwgPSB0aW1lc3RhbXApKSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuMywgYWVzKGZpbGwgPSBzZWFzb24pLCBjb2xvdXIgPSBOQSkgKwogIHhsYWIoJ1xuVGVtcGVyYXR1cmUnKSArIAogIHlsYWIoJ0h1bWlkaXR5XG4nKSArCiAgdGhlbWVfYncoKQoKIyBpbnRlcmFjdGl2ZSBzY2F0dGVyIHBsb3QKZ2dwbG90bHkocDEpCgpgYGAKCg==